# Replication code for Taylor C. Boas, Dino P. Christenson, and David M. Glick, "Recruiting Large Online Samples in the United States and India: Facebook, Mechanical Turk and Qualtrics," Political Science Research and Methods.

# Analysis conducted in R 3.4.3 on MacOS 10.13.2

# NOTE: This file produces figures related to the spatial distribution of Indian respondents: Main Text Figure 3 and Appendix Figures 5-7. Files should be run in the following order; please see readme.txt for details.
# 	1. clean_us_survey.R
# 	2. clean_india_survey.R
# 	3. merge_external_data_us.R
# 	4. merge_external_data_india.R
# 	5. analyze_demographics.R
# 	6. analyze_spaces.R
# 	7. analyze_politics.R
# 	8. analyze_cooperativeness.R
# 	9. analyze_experiments.R

# Set working directory as appropriate
# setwd('~/Dropbox/sample recruitment shared/replication/')

# Clean desktop and load packages. Please make sure all necessary packages are installed.
rm(list=ls(all=T))
library(foreign)
library(survey)
library(sp)
library(raster)

# Load survey data
load('india_completions_augmented.RData')

#######################################
# Appendix Figures 5-7: India Heat Maps
#######################################

# Begin with lat and long 
i.long.mt <- india1$long[india1$sample=='MTurk']
i.lat.mt <- india1$lat[india1$sample=='MTurk']
i.long.fb <- india1$long[india1$sample=='Facebook']
i.lat.fb <- india1$lat[india1$sample=='Facebook']
i.long.qt <- india1$long[india1$sample=='Qualtrics']
i.lat.qt <- india1$lat[india1$sample=='Qualtrics']

i.sample.mt <- india1$sample[india1$sample=='MTurk']
i.sample.fb <- india1$sample[india1$sample=='Facebook']
i.sample.qt <- india1$sample[india1$sample=='Qualtrics']

# REPLICATION NOTE: The commented-out lines below query the Database of Global Administrative Areas (GADM) to retrieve polygons for drawing state and district borders in India. Since we do not control the GADM servers, and administrative boundaries are subject to change, we cannot guarantee that this process is replicable. Hence, the replication file loads the polygons that we obtained in February 2018 using these commented-out lines of code. Uncommment these two lines to re-generate these data. 

# IN1 <- getData('GADM', country='IND', level=1)
# IN2 <- getData('GADM', country='IND', level=2)
# save(IN1, IN2, file='india_polygons.RData')
load('india_polygons.RData')

# Heat Map Function 
heatMap <-function(data,shape=NULL,col="blue",main="Sample HeatMap"){
  # Plots a Heat Map of a Polygons Data Frame.  
  #   data:   Spatial Points dataframe
  #   shape:  Polygons Data Frame 
  #   Notes:  function requires the sp and RColorBrewer
  is.installed <- function(mypkg) is.element(mypkg, 
                                             installed.packages()[,1])
  if (is.installed(mypkg="sp")==FALSE)  {
    stop("sp package is not installed")}
  if (is.installed(mypkg="RColorBrewer")==FALSE)  {
    stop("RColorBrewer package is not installed")}
  if (!class(data)=="SpatialPointsDataFrame")  {
    stop("data argument is not SpatialPointsDataFrame")}
  require(sp)
  require(RColorBrewer)
  freq_table<-data.frame(tabulate(over(as(data,"SpatialPoints"),
                                       as(shape,"SpatialPolygons")),nbins=length(shape)))
  names(freq_table)<-"counts"
  
  shape1<-spChFIDs(shape,as.character(1:length(shape)))
  row.names(as(shape1,"data.frame"))
  spdf<-SpatialPolygonsDataFrame(shape1, freq_table, match.ID = TRUE)
  
  rw.colors<-colorRampPalette(c("white",col))
  spplot(spdf,scales = list(draw = TRUE),
         col.regions=rw.colors(max(freq_table)), main=main)
}

# Turn into spatial points data frame - needs variable beyond long and lat  
spp.data.mt <- na.exclude(data.frame(i.lat.mt, i.long.mt, i.sample.mt))
spp.data.fb <- na.exclude(data.frame(i.lat.fb, i.long.fb, i.sample.fb))
spp.data.qt <- na.exclude(data.frame(i.lat.qt, i.long.qt, i.sample.qt))
coordinates(spp.data.mt)<-c("i.long.mt","i.lat.mt")       
coordinates(spp.data.fb)<-c("i.long.fb","i.lat.fb")
coordinates(spp.data.qt)<-c("i.long.qt","i.lat.qt") 

# Plot choropleth map for all india 
pdf("india_mt_heatmap.pdf",height=7,width=7)
proj4string(spp.data.mt)<-proj4string(IN1)   
heatMap(spp.data.mt, IN1, col="red",main="MTurk")
dev.off()

pdf("india_fb_heatmap.pdf",height=7,width=7)
proj4string(spp.data.fb)<-proj4string(IN1)   
heatMap(spp.data.fb, IN1, col="blue",main="Facebook")
dev.off()

pdf("india_qt_heatmap.pdf",height=7,width=7)
proj4string(spp.data.qt)<-proj4string(IN1)   
heatMap(spp.data.qt, IN1, col="green",main="Qualtrics")
dev.off()

##############################################
# Main Text Figure 3: Southern India Heat Maps
##############################################

# Subset to Kerala and Tamil Nadu   
southern <- subset(IN2, NAME_1=="Kerala" | NAME_1=="Tamil Nadu")

# Plot choropleth map for southern tip  
pdf("i_south_mt_heatmap.pdf",height=7,width=7)
proj4string(spp.data.mt)<-proj4string(southern)
heatMap(spp.data.mt, southern, col="red", main="MTurk")
dev.off()

pdf("i_south_fb_heatmap.pdf",height=7,width=7)
proj4string(spp.data.fb)<-proj4string(southern)   
heatMap(spp.data.fb, southern, col="blue", main="Facebook")
dev.off()

pdf("i_south_qt_heatmap.pdf",height=7,width=7)
proj4string(spp.data.qt)<-proj4string(southern)   
heatMap(spp.data.qt, southern, col="green", main="Qualtrics")
dev.off()
